Data

Basic descriptives of overall activity

# PER INFLUENCER
tw <- tw %>%
  mutate(PROFILE = gsub("^.*\\.com/([^/]+).*", "\\1", URL))
tw <- as.data.table(tw)

# most active profiles
unique(tw[,.N,PROFILE][order(-N)]) %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most popular
tw %>% 
  group_by(PROFILE) %>%
  summarise(FOLLOW = mean(FOLLOWERS_COUNT)) %>%
  arrange(desc(FOLLOW)) %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most influential
tw %>% 
  group_by(PROFILE) %>%
  summarise(REACH = sum(REACH)) %>%
  arrange(desc(REACH)) %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most influential II
tw %>% 
  group_by(PROFILE) %>%
  summarise(INTERACTIONS = sum(INTERACTIONS)) %>%
  arrange(desc(INTERACTIONS)) %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most appreciated
tw %>% 
  group_by(PROFILE) %>%
  summarise(FAVORITE = sum(FAVORITE_COUNT)) %>%
  arrange(desc(FAVORITE)) %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most appreciated
tw %>% 
  group_by(PROFILE) %>%
  summarise(RETWEET = sum(RETWEET_COUNT)) %>%
  arrange(desc(RETWEET)) %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# PER TWEET

# most popular
tw %>% 
  select(PROFILE, FULL_TEXT, FOLLOWERS_COUNT,URL) %>%
  arrange(desc(FOLLOWERS_COUNT)) %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most influential
tw %>% 
  select(PROFILE, FULL_TEXT, REACH,URL) %>%
  arrange(desc(REACH))  %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most influential II
tw %>% 
  select(PROFILE, FULL_TEXT, INTERACTIONS,URL) %>%
  arrange(desc(INTERACTIONS))  %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most appreciated
tw %>% 
  select(PROFILE, FULL_TEXT, FAVORITE_COUNT,URL) %>%
  arrange(desc(FAVORITE_COUNT)) %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most appreciated
tw %>% 
  select(PROFILE, FULL_TEXT, RETWEET_COUNT,URL) %>%
  arrange(desc(RETWEET_COUNT)) %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))

check twitter activity on CRO supply side

# select relevant CRO profiles
unique(tw[,.N,FROM][order(-N)]) %>%
  filter(N > 5) %>% 
  pull(FROM) -> CRO_TW

tw[FROM %in% CRO_TW,] -> CTW

CTW %>%
  mutate(PROFILE = gsub("^.*\\.com/([^/]+).*", "\\1", URL)) -> CTW

# most popular
CTW %>% 
  group_by(PROFILE) %>%
  summarise(FOLLOW = mean(FOLLOWERS_COUNT)) %>%
  arrange(desc(FOLLOW))  %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most influential
CTW %>% 
  group_by(PROFILE) %>%
  summarise(REACH = sum(REACH)) %>%
  arrange(desc(REACH)) %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most influential II
CTW %>% 
  group_by(PROFILE) %>%
  summarise(INTERACTIONS = sum(INTERACTIONS)) %>%
  arrange(desc(INTERACTIONS))  %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most appreciated
CTW %>% 
  group_by(PROFILE) %>%
  summarise(FAVORITE = sum(FAVORITE_COUNT)) %>%
  arrange(desc(FAVORITE)) %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most appreciated
CTW %>% 
  group_by(PROFILE) %>%
  summarise(RETWEET = sum(RETWEET_COUNT)) %>%
  arrange(desc(RETWEET))  %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# PER TWEET

# most popular
CTW %>% 
  select(PROFILE, FULL_TEXT, FOLLOWERS_COUNT,URL) %>%
  arrange(desc(FOLLOWERS_COUNT))  %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most influential
CTW %>% 
  select(PROFILE, FULL_TEXT, REACH,URL) %>%
  arrange(desc(REACH))  %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most influential II
CTW %>% 
  select(PROFILE, FULL_TEXT, INTERACTIONS,URL) %>%
  arrange(desc(INTERACTIONS)) %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most appreciated
CTW %>% 
  select(PROFILE, FULL_TEXT, FAVORITE_COUNT,URL) %>%
  arrange(desc(FAVORITE_COUNT)) %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most appreciated
CTW %>% 
  select(PROFILE, FULL_TEXT, RETWEET_COUNT,URL) %>%
  arrange(desc(RETWEET_COUNT))  %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))

Check forum activity

##             word sentiment brija
##  1:        gould   0.58639   POZ
##  2:   izgovarati   0.56071   NEG
##  3:      nebeski   0.59666   POZ
##  4:         opus   0.36952   NEG
##  5:     poslenik   0.27655   POZ
##  6:       brodar   0.39643   POZ
##  7:      schultz   0.14534   NEG
##  8:        ingra   0.30493   NEG
##  9:    pridavati   0.57727   POZ
## 10:         kuta   0.60072   NEG
## 11:  ekskluzivan   0.50823   NEG
## 12:       toranj   0.50855   POZ
## 13: selektivnost   0.19562   NEG
## 14:   kapetanica   0.32821   NEG
## 15:         doći   0.39604   POZ
# read in data
forum <- as.data.table(forum)


forum[,.N, TITLE][order(-N)] %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
forum[TITLE == "Zoran Milanović, predsjednik Republike Hrvatske vol. IV",] %>% 
  unnest_tokens(word,FULL_TEXT) -> ZM_token
  
  

# remove stop words, numbers, single letters
ZM_token %>% 
  anti_join(stop_corpus, by = "word") %>%
  mutate(word = gsub("\\d+", NA, word)) %>%
  mutate(word = gsub("^[a-zA-Z]$", NA, word)) -> ZM_tokenTidy
# remove NA
ZM_tokenTidy %>%
  filter(!is.na(word)) -> ZM_tokenTidy

ZM_tokenTidy[,.N,by = word][order(-N),]
##               word    N
##     1:       quote 3264
##     2:   milanović 1643
##     3:         hdz 1274
##     4:        onda  838
##     5: predsjednik  811
##    ---                 
## 38457:       sišao    1
## 38458:    oblacima    1
## 38459:      smotre    1
## 38460:   rasipanje    1
## 38461:    hašomana    1
## Vizualize most common words
ZM_tokenTidy[,.N,by = word][N>500][order(-N),][,word := reorder(word,N)] %>%
  ggplot(aes(word, N)) +
  geom_col() +
  xlab(NULL) +
  coord_flip() +
  theme_economist()